From 91e1345d1c287e4798ecc2fc8ed20b0dd70e28c6 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Thu, 3 Apr 2003 18:58:55 +0000 Subject: [PATCH] bitkeeper revision 1.160.1.1 (3e8c846fQSuOz1Dd8MgUzwG5rj3bDQ) Many files: Free DOM0 kernel memory to Xen allocation pool after DOM0 is created. Fixed page-type handling -- we now correctly flush TLB if a page is unpinned after a disk read and refcnt falls to zero. --- xen/common/domain.c | 40 +++++++++++------------------------ xen/common/kernel.c | 27 +++++++++++------------ xen/common/memory.c | 10 ++++----- xen/common/page_alloc.c | 23 +++++++++++++++++--- xen/drivers/block/xen_block.c | 23 ++++++++++++++++---- xen/include/xeno/config.h | 1 + xen/include/xeno/mm.h | 18 ++++++++++++---- xen/include/xeno/multiboot.h | 5 +++++ xen/include/xeno/sched.h | 4 +++- xen/net/dev.c | 2 +- 10 files changed, 93 insertions(+), 60 deletions(-) diff --git a/xen/common/domain.c b/xen/common/domain.c index 99a29d2771..2102e29ee3 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -14,16 +14,11 @@ #include #include #include -#include #include #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED) #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) -extern int nr_mods; -extern module_t *mod; -extern unsigned char *cmdline; - rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* @@ -173,7 +168,7 @@ unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) for ( alloc_pfns = 0; alloc_pfns < req_pages; alloc_pfns++ ) { pf = list_entry(temp, struct pfn_info, list); - pf->flags |= p->domain; + pf->flags = p->domain; pf->type_count = pf->tot_count = 0; temp = temp->next; list_del(&pf->list); @@ -366,9 +361,9 @@ static unsigned long alloc_page_from_domain(unsigned long * cur_addr, /* setup_guestos is used for building dom0 solely. other domains are built in * userspace dom0 and final setup is being done by final_setup_guestos. */ -int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) +int setup_guestos(struct task_struct *p, dom0_newdomain_t *params, + char *data_start, unsigned long data_len, char *cmdline) { - struct list_head *list_ent; char *src, *dst; int i, dom = p->domain; @@ -387,13 +382,13 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) /* Sanity! */ if ( p->domain != 0 ) BUG(); - if ( strncmp(__va(mod[0].mod_start), "XenoGues", 8) ) + if ( strncmp(data_start, "XenoGues", 8) ) { printk("DOM%d: Invalid guest OS image\n", dom); return -1; } - virt_load_address = *(unsigned long *)__va(mod[0].mod_start + 8); + virt_load_address = *(unsigned long *)(data_start + 8); if ( (virt_load_address & (PAGE_SIZE-1)) ) { printk("DOM%d: Guest OS load address not page-aligned (%08lx)\n", @@ -407,13 +402,12 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) alloc_address <<= PAGE_SHIFT; alloc_index = p->tot_pages; - if ( (mod[nr_mods-1].mod_end-mod[0].mod_start) > - (params->memory_kb << 9) ) + if ( data_len > (params->memory_kb << 9) ) { printk("DOM%d: Guest OS image is too large\n" " (%luMB is greater than %uMB limit for a\n" " %uMB address space)\n", - dom, (mod[nr_mods-1].mod_end-mod[0].mod_start)>>20, + dom, data_len>>20, (params->memory_kb)>>11, (params->memory_kb)>>10); free_all_dom_mem(p); @@ -539,9 +533,9 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) __write_cr3_counted(pagetable_val(p->mm.pagetable)); /* Copy the guest OS image. */ - src = (char *)__va(mod[0].mod_start + 12); + src = (char *)(data_start + 12); dst = (char *)virt_load_address; - while ( src < (char *)__va(mod[nr_mods-1].mod_end) ) *dst++ = *src++; + while ( src < (data_start+data_len) ) *dst++ = *src++; /* Set up start info area. */ memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address)); @@ -568,23 +562,13 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) /* Add block io interface */ virt_startinfo_address->blk_ring = virt_to_phys(p->blk_ring_base); - /* We tell OS about any modules we were given. */ - if ( nr_mods > 1 ) - { - virt_startinfo_address->mod_start = - (mod[1].mod_start-mod[0].mod_start-12) + virt_load_address; - virt_startinfo_address->mod_len = - mod[nr_mods-1].mod_end - mod[1].mod_start; - } - dst = virt_startinfo_address->cmd_line; - if ( mod[0].string ) + if ( cmdline != NULL ) { - char *modline = (char *)__va(mod[0].string); for ( i = 0; i < 255; i++ ) { - if ( modline[i] == '\0' ) break; - *dst++ = modline[i]; + if ( cmdline[i] == '\0' ) break; + *dst++ = cmdline[i]; } } *dst = '\0'; diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 61e65350b1..e329980a52 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -1,8 +1,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -27,10 +27,6 @@ struct e820entry { unsigned long type; /* type of memory segment */ }; -/* Used by domain.c:setup_guestos */ -int nr_mods; -module_t *mod; - void init_vga(void); void init_serial(void); void start_of_day(void); @@ -65,6 +61,7 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) dom0_newdomain_t dom0_params; unsigned long max_page; unsigned char *cmdline; + module_t *mod; int i; /* @@ -119,8 +116,7 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) } #endif - nr_mods = mbi->mods_count; - mod = (module_t *)__va(mbi->mods_addr); + mod = (module_t *)__va(mbi->mods_addr); /* Parse the command line. */ cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL); @@ -174,7 +170,7 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) printk("Initialised all memory on a %luMB machine\n", max_page >> (20-PAGE_SHIFT)); - init_page_allocator(mod[nr_mods-1].mod_end, MAX_MONITOR_ADDRESS); + init_page_allocator(mod[0].mod_end, MAX_MONITOR_ADDRESS); /* These things will get done by do_newdomain() for all other tasks. */ current->shared_info = (void *)get_free_page(GFP_KERNEL); @@ -195,11 +191,16 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) new_dom = do_newdomain(0, 0); if ( new_dom == NULL ) panic("Error creating domain 0\n"); - if ( setup_guestos(new_dom, &dom0_params) != 0 ) - { - panic("Could not set up DOM0 guest OS\n"); - } - update_dom_time(new_dom->shared_info); + if ( setup_guestos(new_dom, + &dom0_params, + __va(mod[0].mod_start), + mod[0].mod_end - mod[0].mod_start, + __va(mod[0].string)) + != 0 ) panic("Could not set up DOM0 guest OS\n"); + + release_bytes_to_allocator(__pa(&_end), mod[0].mod_end); + + update_dom_time(new_dom->shared_info); wake_up(new_dom); cpu_idle(); diff --git a/xen/common/memory.c b/xen/common/memory.c index 4b0848ea9d..e6139b00e5 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -275,6 +275,7 @@ static int inc_page_refcnt(unsigned long page_nr, unsigned int type) return -1; } + page->flags &= ~PG_type_mask; page->flags |= type; } @@ -286,7 +287,6 @@ static int inc_page_refcnt(unsigned long page_nr, unsigned int type) static int dec_page_refcnt(unsigned long page_nr, unsigned int type) { struct pfn_info *page; - int ret; if ( page_nr >= max_page ) { @@ -303,9 +303,8 @@ static int dec_page_refcnt(unsigned long page_nr, unsigned int type) return -1; } ASSERT(page_type_count(page) != 0); - if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask; put_page_tot(page); - return ret; + return put_page_type(page); } @@ -439,8 +438,10 @@ static int get_page(unsigned long page_nr, int writeable) page_type_count(page)); return(-1); } + page->flags &= ~PG_type_mask; page->flags |= PGT_writeable_page; } + page->flags &= ~PG_noflush; get_page_type(page); } @@ -501,10 +502,7 @@ static void put_page(unsigned long page_nr, int writeable) ((page_type_count(page) != 0) && ((page->flags & PG_type_mask) == PGT_writeable_page))); if ( writeable && (put_page_type(page) == 0) ) - { tlb_flush[smp_processor_id()] = 1; - page->flags &= ~PG_type_mask; - } put_page_tot(page); } diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 1bfeed440f..8cbf37dbef 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -110,7 +110,10 @@ static chunk_head_t free_tail[FREELIST_SIZE]; #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) -/* Initialise allocator, placing addresses [@min,@max] in free pool. */ +/* + * Initialise allocator, placing addresses [@min,@max] in free pool. + * @min and @max are PHYSICAL addresses. + */ void __init init_page_allocator(unsigned long min, unsigned long max) { int i; @@ -168,7 +171,21 @@ void __init init_page_allocator(unsigned long min, unsigned long max) } -/* Allocate 2^@order contiguous pages. */ +/* Release a PHYSICAL address range to the allocator. */ +void release_bytes_to_allocator(unsigned long min, unsigned long max) +{ + min = round_pgup (min) + PAGE_OFFSET; + max = round_pgdown(max) + PAGE_OFFSET; + + while ( min < max ) + { + __free_pages(min, 0); + min += PAGE_SIZE; + } +} + + +/* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */ unsigned long __get_free_pages(int mask, int order) { int i, attempts = 0; @@ -233,7 +250,7 @@ retry: } -/* Free 2^@order pages at location @p. */ +/* Free 2^@order pages at VIRTUAL address @p. */ void __free_pages(unsigned long p, int order) { unsigned long size = 1 << (order + PAGE_SHIFT); diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c index 156dcbb94c..8901ee59fb 100644 --- a/xen/drivers/block/xen_block.c +++ b/xen/drivers/block/xen_block.c @@ -277,7 +277,8 @@ static int __buffer_is_valid(struct task_struct *p, /* If reading into the frame, the frame must be writeable. */ if ( writeable_buffer && - ((page->flags & PG_type_mask) != PGT_writeable_page) ) + ((page->flags & PG_type_mask) != PGT_writeable_page) && + (page->type_count != 0) ) { DPRINTK("non-writeable page passed for block read\n"); goto out; @@ -301,7 +302,16 @@ static void __lock_buffer(unsigned long buffer, pfn++ ) { page = frame_table + pfn; - if ( writeable_buffer ) get_page_type(page); + if ( writeable_buffer ) + { + if ( page->type_count == 0 ) + { + page->flags &= ~PG_type_mask; + /* NB. This ref alone won't cause a TLB flush. */ + page->flags |= PGT_writeable_page | PG_noflush; + } + get_page_type(page); + } get_page_tot(page); } } @@ -320,8 +330,13 @@ static void unlock_buffer(struct task_struct *p, pfn++ ) { page = frame_table + pfn; - if ( writeable_buffer && (put_page_type(page) == 0) ) - page->flags &= ~PG_type_mask; + if ( writeable_buffer && + (put_page_type(page) == 0) && + !(page->flags & PG_noflush) ) + { + __flush_tlb(); + } + page->flags &= ~PG_noflush; put_page_tot(page); } spin_unlock_irqrestore(&p->page_lock, flags); diff --git a/xen/include/xeno/config.h b/xen/include/xeno/config.h index ed08041b7d..4067f52dc7 100644 --- a/xen/include/xeno/config.h +++ b/xen/include/xeno/config.h @@ -141,6 +141,7 @@ #define capable(_c) 0 #ifndef __ASSEMBLY__ +extern unsigned long _end; /* standard ELF symbol */ extern void __out_of_line_bug(int line) __attribute__((noreturn)); #define out_of_line_bug() __out_of_line_bug(__LINE__) #endif diff --git a/xen/include/xeno/mm.h b/xen/include/xeno/mm.h index 6605f1ed58..6a2e2e9396 100644 --- a/xen/include/xeno/mm.h +++ b/xen/include/xeno/mm.h @@ -35,6 +35,7 @@ */ void init_page_allocator(unsigned long min, unsigned long max); +void release_bytes_to_allocator(unsigned long min, unsigned long max); unsigned long __get_free_pages(int mask, int order); void __free_pages(unsigned long p, int order); #define get_free_page(_m) (__get_free_pages((_m),0)) @@ -51,10 +52,6 @@ void __free_pages(unsigned long p, int order); * with struct pfn_info and frame_table respectively. Boris Dragovic */ -/* - * This is still fatter than I'd like. Do we need the count? - * Do we need the flags? The list at least seems req'd by slab.c. - */ typedef struct pfn_info { struct list_head list; /* ->mapping has some page lists. */ unsigned long flags; /* atomic flags. */ @@ -100,6 +97,19 @@ typedef struct pfn_info { #define PGT_writeable_page (7<<24) /* has writable mappings of this page? */ #define PGT_net_rx_buf (8<<24) /* this page has been pirated by the net code. */ +/* + * This bit is sometimes set by Xen when it holds a writeable reference to a + * page that shouldn't cause a TLB flush when it is dropped. For example, a + * disk write to a page with initial type_count == 0, which returns to 0 after + * the I/O. In this case, we'd normally flush the TLB because a writeable page + * has just lost its mutually-exclusive type. But this isn't necessary here + * because the writeable reference never made it into user-accessible TLB + * (didn't make it into TLB at all, in fact). + * + * This bit is obviously nuked in a few places, for safety. + */ +#define PG_noflush (1<<28) + #define PageSlab(page) test_bit(PG_slab, &(page)->flags) #define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) #define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) diff --git a/xen/include/xeno/multiboot.h b/xen/include/xeno/multiboot.h index a61117bbbd..bdf313262b 100644 --- a/xen/include/xeno/multiboot.h +++ b/xen/include/xeno/multiboot.h @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#ifndef __MULTIBOOT_H__ +#define __MULTIBOOT_H__ + #ifndef __ELF__ #error "Build on a 32-bit ELF system" #endif @@ -79,3 +82,5 @@ typedef struct memory_map unsigned long length_high; unsigned long type; } memory_map_t; + +#endif /* __MULTIBOOT_H__ */ diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 555efd9e81..147f3c40fd 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -200,7 +200,9 @@ extern union task_union idle0_task_union; extern struct task_struct first_task_struct; extern struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu); -extern int setup_guestos(struct task_struct *p, dom0_newdomain_t *params); +extern int setup_guestos( + struct task_struct *p, dom0_newdomain_t *params, + char *data_start, unsigned long data_len, char *cmdline); extern int final_setup_guestos(struct task_struct *p, dom_meminfo_t *); struct task_struct *find_domain_by_id(unsigned int dom); diff --git a/xen/net/dev.c b/xen/net/dev.c index c992cfab04..34caf14aed 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -519,7 +519,7 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) h_pfn->tot_count = h_pfn->type_count = 1; g_pfn->tot_count = g_pfn->type_count = 0; - h_pfn->flags = g_pfn->flags & (~PG_type_mask); + h_pfn->flags = g_pfn->flags & ~PG_type_mask; if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page; g_pfn->flags = 0; -- 2.30.2